x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
n1 <- c(6,13,18,28,52,53,61,60)
n0 <- c(59,60,62,56,63,59,62,60) - n1
\[log(\frac{p}{1-p})=\beta_1+\beta_2x \Leftrightarrow p(y=1|x) = \frac{e^{\beta_1 + \beta_2x}}{1+e^{\beta_1 + \beta_2x}}\]
\[l((x_k,n_k)_k; \beta)=\prod_{j=1}^k(\prod_{i=1}^{n_j}(\pi_j)^{y_{ij}}(1-\pi_j)^{1-y_{ij}})\]
\[l((x_k,n_k)_k; \beta)=\prod_{j=1}^k(\prod_{i=1}^{n_j}(\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{y_{ij}}(\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{1-y_{ij}})\]
\[l((x_k,n_k)_k; \beta)=\prod_{j=1}^k(\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^1}(\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^0}\]
\[l((x_k,n_k)_k; \beta) =\prod_{j=1}^k(\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^1}(\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^0}\]
\[L((x_k,n_k)_k; \beta) = \sum_{j=1}^klog((\frac{e^{\beta_1+\beta_2x_j}}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^1}) + log((\frac{1}{1 + e^{\beta_1+\beta_2x_j}})^{n_j^0})\]
\[L((x_k,n_k)_k; \beta) = \sum_{j=1}^k-n_j^1log(1 + e^{-(\beta_1+\beta_2x_j)}) - {n_j^0}(\beta_1+\beta_2x_j)-n^0_jlog(1 + e^{-(\beta_1+\beta_2x_j)}))\]
\[U(\beta) = \sum_{j=1}^k(n^1_j(1-\pi_j) -n^0_j\pi_j, x_j(n^1_j(1-\pi_j) -n^0_j\pi_j)\]
\[I_{11}=-\frac{\partial^2 L}{\partial \beta_1^2} = \frac{\partial L}{\partial \beta_1}\] \[I_{22}=-\frac{\partial^2 L}{\partial \beta_2^2} = \sum_{j=1}^k x_j^2(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})\]
\[I_{12}=-\frac{\partial^2 L}{\partial \beta_1 \beta_2} = \sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})\]
\[I_{21}=I_{12}\]
\[I(\beta)^{-1} = \frac{\begin{bmatrix}\sum_{j=1}^k x_j^2(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)} &-\sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})\\-\sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})& \sum_{j=1}^k(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)}) \end{bmatrix}}{(\sum_{j=1}^k x_j^2(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)}))(\sum_{j=1}^k(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)})) - (\sum_{j=1}^k x_j(\frac{n^1_j(1-\pi_j)}{\pi_j} -\frac{n^0_j\pi_j}{(1-\pi_j)}))^2}\]
\[ \beta_{n+1} = \beta_n + (I(\beta_n))^{-1}U(\beta_n), n \ge 1\]
# option glm
# option log_odds ~ x with fit.lm
# option small values
# option visualization
\[D = 2[L(\beta_{max};y) - L(\beta;y)] \]
General Formula:
\[S_w = \sum_{j=1}^k\frac{(y_j - E(y_j))^2}{Var(y_j)}\]
\[ X^2 = \sum_{j=1}^k \frac{(y_j - n_j^1\hat{\pi}_j)^2}{n_j^1\hat{\pi}_j(1-\hat{\pi}_j)} \sim \chi^2_{(k-2)}\]
We obtain:
\[X^2 = \sum_{j=1}^k \frac{(y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}))^2}{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})} \]
\[d_j = sign(y_j - (n_j^1+n_j^0)\hat{\pi}_j)\sqrt{2[y_jlog(\frac{y_j}{(n_j^1+n_j^0)\hat{\pi}_j})+((n_j^1+n_j^0)-y_j)log(\frac{(n_j^1+n_j^0)-y_j}{(n_j^1+n_j^0)(1 - \hat{\pi}_j)})]}\]
\[r_j = \frac{y_j - \hat{\pi}_j}{\sqrt{\hat{Var(y_j)}}}\]
\[ r_j = \frac{y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}{\sqrt{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}} \]
\[p=\phi(\beta_1+\beta_2x)\]
\[\prod_{j=1}^k(\Phi(x_j\beta)^{n^1_j}(1 - \Phi(x_j\beta))^{n^0_j}\]
\[L(X,\beta_1, \beta_2) = \sum_{j=1}^k (n^1_jlog(\Phi(x_j\beta)) + n^0_jlog(1- \Phi(x_j\beta)))\]
\[L(X,\beta_1, \beta_2) = \sum_{j=1}^k (n^1_jlog(\int_0^{\beta_0 + \beta_1x_j}\frac{e^{-\frac{x^2}{2}}}{\sqrt{2\pi}}dx) + n^0_jlog(1- \int_0^{\beta_0 + \beta_1x_j}\frac{e^{-\frac{x^2}{2}}}{\sqrt{2\pi}}dx))\]
\[U(\beta) = (\sum_{j=1}^n \frac{e^{-\frac{1}{2}}(n^1_j -n^0_j)}{\sqrt{2\pi}\Phi(\beta_1+\beta_2x_j)}, \sum_{j=1}^n\frac{e^{-\frac{-x_j^2}{2}}(n^1_j -n^0_j)}{\sqrt{2\pi}\Phi(\beta_1 + \beta_2 x_j)})\]
\[I(\beta) = \begin{bmatrix} \sum_{j=1}^n\frac{e^{-1}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & \sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2}\\ \sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & \sum_{j=1}^n\frac{e^{-x_j^2}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} \end{bmatrix}\]
\[I(\beta)^{-1} = \frac{\begin{bmatrix} \sum_{j=1}^n\frac{e^{-1}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & -\sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2}\\ -\sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} & \sum_{j=1}^n\frac{e^{-x_j^2}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2} \end{bmatrix}}{(\sum_{j=1}^n\frac{e^{-1}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2})( \sum_{j=1}^n\frac{e^{-x_j^2}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2}) - (\sum_{j=1}^n\frac{e^{-\frac{1}{2}(x_j^2+1)}(n^1_j - n^0_j)}{2\pi\Phi(\beta_1 + \beta_2 x_j)^2})^2}\]
\[ \beta_{n+1} = \beta_n + (I(\beta_n))^{-1}U(\beta_n), n \ge 1\]
# option glm
# option log_odds ~ x with fit.lm
# option small values
# option visualization
\[D = 2[L(\beta_{max};y) - L(\beta;y)] \]
General Formula:
\[S_w = \sum_{j=1}^k\frac{(y_j - E(y_j))^2}{Var(y_j)}\]
\[ X^2 = \sum_{j=1}^k \frac{(y_j - n_j^1\hat{\pi}_j)^2}{n_j^1\hat{\pi}_j(1-\hat{\pi}_j)} \sim \chi^2_{(k-2)}\]
We obtain:
\[X^2 = \sum_{j=1}^k \frac{(y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}))^2}{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})} \]
\[d_j = sign(y_j - (n_j^1+n_j^0)\hat{\pi}_j)\sqrt{2[y_jlog(\frac{y_j}{(n_j^1+n_j^0)\hat{\pi}_j})+((n_j^1+n_j^0)-y_j)log(\frac{(n_j^1+n_j^0)-y_j}{(n_j^1+n_j^0)(1 - \hat{\pi}_j)})]}\]
\[r_j = \frac{y_j - \hat{\pi}_j}{\sqrt{\hat{Var(y_j)}}}\]
\[ r_j = \frac{y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}{\sqrt{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}} \]
\[log(\frac{p}{1-p})=\beta_1+\beta_2x \Leftrightarrow p(y=1|x) = \frac{e^{\beta_1 + \beta_2x}}{1+e^{\beta_1 + \beta_2x}}\]
\[l((x_k,n_k)_k; \beta)= \prod_{j=1}^k (1 - e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^1_j}(e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^0_j}\]
\[L(X,\beta_1, \beta_2) = log(\prod_{j=1}^k (1 - e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^1_j}(e^{-e^{(\beta_1 + \beta_2 x_j)}})^{n^0_j}) = \sum_{j=1}^k (n^1_j log(\pi_j) + n^0_jlog(1-\pi_j))\]
\[U(\beta) = (\sum_{j=1}^k\frac{e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)}}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})}(n^1_j-n^0_j), \sum_{j=1}^k\frac{x_je^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)}}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})}(n^1_j-n^0_j))\]
\[I(\beta) = \begin{bmatrix} \sum_{j=1}^k\frac{(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j) & \sum_{j=1}^k\frac{x_j(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j)\\ \sum_{j=1}^k\frac{x_j(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j) & \sum_{j=1}^k\frac{x_j^2(e^{-e^{(\beta_1 + \beta_2 x_j)}+(\beta_1 + \beta_2 x_j)})(e^{-e^{\beta_1 + \beta_2 x_j}} + e^{\beta_1 + \beta_2 x_j} - 1)}{(1 - e^{-e^{\beta_1 + \beta_2 x_j}})^2}(n^1_j-n^0_j) \end{bmatrix}\]
\[I(\beta) = \begin{bmatrix} \sum_{i=1}^n\frac{(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & \sum_{i=1}^n\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j)\\ \sum_{i=1}^n\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & \sum_{i=1}^n\frac{x_j^2(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) \end{bmatrix}\]
\[I(\beta)^{-1} = \frac{\begin{bmatrix} \sum_{j=1}^k\frac{x_j^2(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & -\sum_{j=1}^k\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j)\\ -\sum_{j=1}^k\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) & \sum_{j=1}^k\frac{(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)}{(1 - e^{-\mu_j(\beta)})^2}(n^1_j - n^0_j) \end{bmatrix}}{(\sum_{j=1}^k\frac{x_j^2(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)(n^1_j - n^0_j)}{(1 - e^{-\mu_j(\beta)})^2})(\sum_{j=1}^k\frac{(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)(n^1_j - n^0_j)}{(1 - e^{-\mu_j(\beta)})^2}) - (\sum_{j=1}^k\frac{x_j(e^{-\mu_j(\beta)}\mu_j(\beta))(e^{-\mu_j(\beta)} + \mu_j(\beta) - 1)(n^1_j - n^0_j)}{(1 - e^{-\mu_j(\beta)})^2})^2}\]
\[ \beta_{n+1} = \beta_n + (I(\beta_n))^{-1}U(\beta_n), n \ge 1\]
# option glm
# option log_odds ~ x with fit.lm
# option small values
# option visualization
\[D = 2[L(\beta_{max};y) - L(\beta;y)] \]
General Formula:
\[S_w = \sum_{j=1}^k\frac{(y_j - E(y_j))^2}{Var(y_j)}\]
\[ X^2 = \sum_{j=1}^k \frac{(y_j - n_j^1\hat{\pi}_j)^2}{n_j^1\hat{\pi}_j(1-\hat{\pi}_j)} \sim \chi^2_{(k-2)}\]
We obtain:
\[X^2 = \sum_{j=1}^k \frac{(y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}))^2}{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})} \]
\[d_j = sign(y_j - (n_j^1+n_j^0)\hat{\pi}_j)\sqrt{2[y_jlog(\frac{y_j}{(n_j^1+n_j^0)\hat{\pi}_j})+((n_j^1+n_j^0)-y_j)log(\frac{(n_j^1+n_j^0)-y_j}{(n_j^1+n_j^0)(1 - \hat{\pi}_j)})]}\]
\[r_j = \frac{y_j - \hat{\pi}_j}{\sqrt{\hat{Var(y_j)}}}\]
\[ r_j = \frac{y_j - (n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}{\sqrt{(n_j^1+n_j^0) (\frac{e^{\hat{\beta}_0 + \hat{\beta}_1x_j}}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})(\frac{1}{1 + e^{\hat{\beta}_0 + \hat{\beta}_1x_j}})}} \]
library(ggplot2)
library(dplyr)
##
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
##
## filter, lag
## Les objets suivants sont masqués depuis 'package:base':
##
## intersect, setdiff, setequal, union
x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
n1 <- c(6,13,18,28,52,53,61,60)
n0 <- c(59,60,62,56,63,59,62,60) - n1
p <- n1/(n0+n1)
df <- data.frame(x=x, p=p)
df %>% ggplot(aes(x=x, y=p)) +
geom_line() +
geom_point(color="red") +
labs(
title= "Observed proportion as a function of dose x",
x="Dose x",
y="Observed proportion p"
)
mean_val <- mean(x)
sd_val <- sd(x)
lower_limit <- mean_val - 2*sd_val
upper_limit <- mean_val + 2*sd_val
df <- data.frame(
index = 1:length(x),
value = x
)
ggplot(df, aes(x=index, y=value)) +
geom_point(size=3) +
geom_hline(yintercept=mean(df$value), color="blue", size=1.2) +
geom_hline(yintercept=upper_limit, color="red", linetype="dashed") +
geom_hline(yintercept=lower_limit, color="red", linetype="dashed") +
theme_minimal() +
labs(title="No under/over-representation justifying weighting the log-likelihood function before optimization",
x="Sample Index",
y="Deviation to the mean")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(ggplot2)
library(tidyr)
library(dplyr)
##data
x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
x_scaled <- (x-mean(x))/sd(x)
n1 <- c(6 ,13,18,28,52,53,61,60)
n0 <- c(59,60,62,56,63,59,62,60) - n1
## general
logistic <- function(z) {
return(ifelse(z >= 0,
1 / (1 + exp(-z)),
exp(z) / (1 + exp(z))))
}
prob <- function(beta) {
z <- beta[1] + beta[2]*x
return(logistic(z))
}
log_lik <- function(beta) {
p <- prob(beta)
return(sum(n1*log(p+eps)+ n0*log(1-p+eps)))
}
## Score
U <- function(beta) {
U <- matrix(nrow=2, ncol=1)
p <- prob(beta)
U[1] <- sum(n1*(1-p)-n0*p)
U[2] <- sum(x*(n1*(1-p)-n0*p))
return(U)
}
stoch_U <- function(beta, index) {
G <- matrix(nrow=2, ncol=1)
p <- prob(beta)
w1 <- n1*(1-p)-n0*p
w2 <- x*(n1*(1-p)-n0*p)
G[1] <- sum(w1[index])
G[2] <- sum(w2[index])
return(G)
}
##Fisher Information
stoch_I <- function(beta, index) {
com <- matrix(ncol=2, nrow=2)
p <- prob(beta)
v1 <- x^2*(n1*(1-p)/(p+eps) - (n0*p)/(1-p+eps))
v2 <- -x*n1*(1-p)/(p+eps) - n0*p/(1-p+eps)
v3 <- v2
v4 <- n1*(1-p)/(p+eps) - n0*p/(1-p+eps)
com[1,1] <- sum(v1[index])
com[1,2] <- sum(v2[index])
com[2,1] <- com[1,2]
com[2,2] <- sum(v3[index])
det <- com[1,1]*com[2,2] - com[1,2]^2
if (abs(det) < 1e-6) {
warning("Déterminant trop petit, inversion instable")
det <- diag(2)*1e6
}
return(com/(det+eps))
}
I <- function(beta) {
com <- matrix(ncol=2, nrow=2)
p <- prob(beta)
com[1,1] <- sum(x^2*(n1*(1-p)/(p+eps) - (n0*p)/(1-p+eps)))
com[1,2] <- -sum(x*(n1*(1-p)/(p+eps) - n0*p/(1-p+eps)))
com[2,1] <- com[1,2]
com[2,2] <- sum(n1*(1-p)/(p+eps) - n0*p/(1-p+eps))
det <- (com[1,1]*com[2,2] - com[1,2]^2)
if (abs(det) < 1e-6) {
warning("Déterminant trop petit, inversion instable")
det <- diag(2)*1e6
}
return(com/(det+eps))
}
## EMV Algo
EMV_algo <- function(beta, n) {
new_beta <- beta + (I(beta)%*%U(beta))
return(new_beta)
}
EMV_algo_lambda <- function(beta, n) {
ll_old <- log_lik(beta)
step <- 1
if (n > 20000) {
param <- sqrt(lambda/n)
} else {
param <- lambda/sqrt(n)
}
for (i in 1:10) {
beta_check <- beta + param*step*(I(beta)%*%U(beta))
ll_new <- log_lik(beta_check)
if (all(is.finite(beta_check)) && ll_new >= ll_old) {
break
}
step <- step / 2
}
beta_star <- beta_check
return(beta_star)
}
EMV_algo_stoch <- function(beta, n, index) {
ll_old <- log_lik(beta)
step <- 1
if (n > 20000) {
param <- sqrt(lambda/n)
} else {
param <- lambda/sqrt(n)
}
for (i in 1:10) {
beta_check <- beta + param*step*(stoch_I(beta, index)%*%stoch_U(beta, index))
ll_new <- log_lik(beta_check)
if (all(is.finite(beta_check)) && ll_new >= ll_old) {
break
}
step <- step / 2
}
beta_star <- beta_check
return(beta_star)
}
##EMV call
resu_EMV_Batch <- function(beta_H1, beta_H2) {
z <- rep(NA, N)
z[1] <- log_lik(c(beta_H1[1], beta_H2[1]))
for (j in 2:N) {
resu <- EMV_algo(c(beta_H1[j-1],beta_H2[j-1]),j)
beta_H1[j] <- resu[1]
beta_H2[j] <- resu[2]
param <- c(resu[1], resu[2])
z[j] <- log_lik(param)
if (sqrt(sum((resu - c(beta_H1[j-1],beta_H2[j-1]))^2))< nu) {
message("Convergence atteinte à l'itération ", j)
break
}
}
return(c(beta_H1, beta_H2, z))
}
resu_EMV_SG <- function(beta_H1S, beta_H2S) {
for (j in 2:N) {
index <- sample(length(x), floor(3*length(x)/4))
resuS <- EMV_algo_stoch(c(beta_H1S[j-1],beta_H2S[j-1]), j, index)
beta_H1S[j] <- resuS[1]
beta_H2S[j] <- resuS[2]
if (sqrt(sum((resuS - c(beta_H1S[j-1],beta_H2S[j-1]))^2))< nu) {
message("Convergence atteinte à l'itération ", j)
break
}
}
return(c(beta_H1S, beta_H2S))
}
####EMV calling
##params
eps <- 1e-8
lambda <- 1
nu <- 1e-6
N <- 10000
## initial values
beta_1_init <- -56.5
beta_2_init <- 32
## batch
beta_H1 <- rep(NA, N)
beta_H1[1] <- beta_1_init
beta_H2 <- rep(NA, N)
beta_H2[1] <- beta_2_init
EMV_output <- resu_EMV_Batch(beta_H1, beta_H2)
beta_H1 <- EMV_output[1:N]
beta_H2 <- EMV_output[(N+1):(2*N)]
## "mini batch"
beta_H1S <- rep(NA, N)
beta_H1S[1] <- beta_1_init
beta_H2S <- rep(NA, N)
beta_H2S[1] <- beta_2_init
EMV_output_sg <- resu_EMV_SG(beta_H1S, beta_H2S)
## Convergence atteinte à l'itération 421
beta_H1S <- EMV_output_sg[1:N]
beta_H2S <- EMV_output_sg[(N+1):(2*N)]
###Plot
df <- data.frame(
iter = 1:N,
beta_H1 = beta_H1,
beta_H2 = beta_H2,
beta_H1S = beta_H1S,
beta_H2S = beta_H2S
)
df_long <- df %>%
pivot_longer(cols = -iter, names_to = c("param", "method"),
names_pattern = "(beta_H1|beta_H2)(S?)",
values_to = "value") %>%
mutate(
param = recode(param, beta_H1 = "Beta 1", beta_H2 = "Beta 2"),
method = ifelse(method == "S", "Stochastic", "Batch")
)
ggplot(df_long, aes(x = iter, y = value, color = method)) +
geom_line() +
facet_wrap(~param, scales = "free_y") +
theme_minimal() +
labs(
title = "Convergence of Parameters Over Iterations",
x = "Iteration",
y = "Parameter Value",
color = "Algorithm"
)
## Warning: Removed 9579 rows containing missing values or values outside the scale range
## (`geom_line()`).
b0_seq <- seq(-1000, 500, length.out =1500)
b1_seq <- seq(-500, 1000, length.out = 1500)
ll_grid <- expand.grid(beta0 = b0_seq, beta1 = b1_seq)
ll_grid$loglik <- apply(ll_grid, 1, function(row) log_lik(c(row[1], row[2])))
library(ggplot2)
ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Log-vraisemblance sur la grille des paramètres")
b0_seq <- seq(-5, 5, length.out =10)
b1_seq <- seq(-5, 5, length.out = 10)
ll_grid <- expand.grid(beta0 = b0_seq, beta1 = b1_seq)
ll_grid$loglik <- apply(ll_grid, 1, function(row) log_lik(c(row[1], row[2])))
library(ggplot2)
ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Log-vraisemblance sur la grille des paramètres")
start_vec <- c(-50, 28, -1/2, 0, 0, 0, -60, 34)
H_11 <- rep(NA, N)
H_12 <- rep(NA, N)
H_21 <- rep(NA, N)
H_22 <- rep(NA, N)
H_31 <- rep(NA, N)
H_32 <- rep(NA, N)
H_41 <- rep(NA, N)
H_42 <- rep(NA, N)
z1 <- rep(NA, N)
z2 <- rep(NA, N)
z3 <- rep(NA, N)
z4 <- rep(NA, N)
for (j in 1:4) {
beta_1_init <- start_vec[2*(j-1) +1]
beta_2_init <- start_vec[2*(j-1) + 2]
beta_H1 <- rep(NA, N)
beta_H1[1] <- beta_1_init
beta_H2 <- rep(NA, N)
beta_H2[1] <- beta_2_init
EMV_output <- resu_EMV_Batch(beta_H1, beta_H2)
beta_H1 <- EMV_output[1:N]
beta_H2 <- EMV_output[(N+1):(2*N)]
z <- EMV_output[(2*N+1):(3*N)]
if (j == 1) {
H_11 <- beta_H1
H_12 <- beta_H2
z1 <- z
} else if (j == 2) {
H_21 <- beta_H1
H_22 <- beta_H2
z2 <- z
} else if (j == 3) {
H_31 <- beta_H1
H_32 <- beta_H2
z3 <- z
} else {
H_41 <- beta_H1
H_42 <- beta_H2
z4 <- z
}
}
library(plotly)
##
## Attachement du package : 'plotly'
## L'objet suivant est masqué depuis 'package:ggplot2':
##
## last_plot
## L'objet suivant est masqué depuis 'package:stats':
##
## filter
## L'objet suivant est masqué depuis 'package:graphics':
##
## layout
library(dplyr)
# Combine all trajectories into one data frame
traj_df <- bind_rows(
data.frame(beta0 = H_11, beta1 = H_12, loglik = z1, traj = "Start 1", iter = 1:N),
data.frame(beta0 = H_21, beta1 = H_22, loglik = z2, traj = "Start 2", iter = 1:N),
data.frame(beta0 = H_31, beta1 = H_32, loglik = z3, traj = "Start 3", iter = 1:N),
data.frame(beta0 = H_41, beta1 = H_42, loglik = z4, traj = "Start 4", iter = 1:N)
)
# Function to get starting, ending, and max-likelihood points
get_special_points <- function(df) {
df <- df %>% filter(!is.na(beta0))
start <- df %>% slice(1) %>% mutate(type = "Start")
end <- df %>% slice_tail(n=1) %>% mutate(type = "End")
max_ll <- df %>% slice_max(loglik, n=1) %>% mutate(type = "Max LL")
bind_rows(start, end, max_ll)
}
special_pts <- traj_df %>%
group_by(traj) %>%
group_modify(~get_special_points(.x)) %>%
ungroup()
# Plot
fig <- plot_ly() %>%
# Trajectories
add_trace(data = traj_df, x = ~beta0, y = ~beta1, z = ~loglik, color = ~traj,
type = 'scatter3d', mode = 'lines', name = ~traj) %>%
# Starting points
add_markers(data = special_pts %>% filter(type=="Start"),
x = ~beta0, y = ~beta1, z = ~loglik,
marker = list(size = 4, color = 'green'), name = 'Start') %>%
# Ending points
add_markers(data = special_pts %>% filter(type=="End"),
x = ~beta0, y = ~beta1, z = ~loglik,
marker = list(size = 4, color = 'red'), name = 'End') %>%
# Max log-likelihood points
add_markers(data = special_pts %>% filter(type=="Max LL"),
x = ~beta0, y = ~beta1, z = ~loglik,
marker = list(size = 4, color = 'blue'), name = 'Max LL') %>%
layout(scene = list(
xaxis = list(title = "Beta 0"),
yaxis = list(title = "Beta 1"),
zaxis = list(title = "Log-Likelihood")
),
title = "Trajectories of Beta Parameters with Starting, Ending, and Max Log-Likelihood Points"
)
# Starting points with coordinates as labels
fig
library(ggplot2)
#install.packages("viridis")
library(viridis)
## Le chargement a nécessité le package : viridisLite
# Exemple : coefficients GLM
glm_coef <- coef(glm(cbind(n1, n0) ~ x, family = binomial()))
b0_seq <- seq(-500, 500, length.out =1000)
b1_seq <- seq(-500, 500, length.out = 1000)
ll_grid <- expand.grid(beta0 = b0_seq, beta1 = b1_seq)
ll_grid$loglik <- apply(ll_grid, 1, function(row) log_lik(c(row[1], row[2])))
ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
geom_tile() +
scale_fill_viridis_c() +
labs(title = "Log-vraisemblance sur la grille des paramètres")
ggplot(ll_grid, aes(x = beta0, y = beta1, fill = loglik)) +
geom_tile() +
scale_fill_viridis_c() +
geom_point(aes(x = glm_coef[1], y = glm_coef[2]), color = "red", size = 3, shape = 17) +
labs(title = "Log-vraisemblance sur la grille des paramètres",
subtitle = "Le point rouge correspond à la solution GLM") +
theme_minimal()
plot(x, n1 / (n1+n0), pch=19, xlab="x", ylab="Proportion de succès")
library(ggplot2)
# Points donnés
p1 <- c(beta0 = -50, beta1 = 28)
p2 <- c(beta0 = -60, beta1 = 34)
# Nombre de points le long de la droite
n_pts <- 200
# Générer la droite par interpolation linéaire
beta0_seq <- seq(p1["beta0"], p2["beta0"], length.out = n_pts)
beta1_seq <- seq(p1["beta1"], p2["beta1"], length.out = n_pts)
# Calculer la log-vraisemblance le long de la droite
loglik_seq <- sapply(1:n_pts, function(i) log_lik(c(beta0_seq[i], beta1_seq[i])))
# Mettre dans un data frame
df_line <- data.frame(beta0 = beta0_seq, beta1 = beta1_seq, loglik = loglik_seq)
# Tracer
ggplot(df_line, aes(x = beta0, y = beta1, fill = loglik)) +
geom_tile() +
scale_fill_viridis_c() +
geom_point(aes(x = p1["beta0"], y = p1["beta1"]), color = "green", size = 3) + # point 1
geom_point(aes(x = p2["beta0"], y = p2["beta1"]), color = "red", size = 3) + # point 2
labs(title = "Évolution de la log-vraisemblance le long de la droite",
x = expression(beta[0]), y = expression(beta[1]), fill = "Log-likelihood") +
theme_minimal()
c(-56.5,32)
## [1] -56.5 32.0
\[(y_j, x_j)^{\perp}, \pi_j := P(Y_{ij} = 1 | x_j) =^{logit} \frac{e^{\beta_0 + \beta_1 x_j}}{1+e^{\beta_0 + \beta_1 x_j}}\]
\[\hat{v_{ij}} = \hat{\beta}_0 + \hat{\beta}_1x_j\]
lv_hat <- -56.5 + 32*x
lv_hat
## [1] -2.3976 -1.3256 -0.3336 0.5944 1.4616 2.2808 3.0520 3.7848
ggplot(data.frame(x=x, y=lv_hat), aes(x=x, y=y)) + geom_line()
p_hat <- (n1+n0)*prob(c(-56.5,32))
p_hat
## [1] 4.917998 12.593286 25.876627 36.084948 51.141968 53.529188 59.201865
## [8] 58.667462
Y <- (n1+n0)*prob(c(-56.5,32))
df_plot <- data.frame(
x = x,
value = c(Y, n1, (n1+n0)*prob(c(-60,34))),
type = rep(c("Predicted", "Observed", "GLM_R"), each = length(x))
)
ggplot(df_plot, aes(x = x, y = value, color = type)) +
geom_line() +
geom_point() +
scale_color_manual(values = c("Predicted" = "red", "Observed" = "green", "GLM_R" = "blue")) +
labs(title = "Comparison between observations, predictions and glm_R predictions",
x = "x", y = "Y")
df_plot <- data.frame(
x = x,
value = c(prob(c(-56.5,32)), n1/(n1+n0), prob(c(-60,34))),
type = rep(c("Predicted", "Observed", "GLM_R"), each = length(x))
)
ggplot(df_plot, aes(x = x, y = value, color = type)) +
geom_line() +
geom_point() +
scale_color_manual(values = c("Predicted" = "red", "Observed" = "green", "GLM_R" = "blue")) +
labs(title = "Comparison between observations, predictions and glm_R predictions",
x = "x", y = "Proportion")
High adjustment quality for tails; low adjustment quality in between.
glm(cbind(n1, n0) ~ x, family = binomial(link="logit"))
##
## Call: glm(formula = cbind(n1, n0) ~ x, family = binomial(link = "logit"))
##
## Coefficients:
## (Intercept) x
## -60.72 34.27
##
## Degrees of Freedom: 7 Total (i.e. Null); 6 Residual
## Null Deviance: 284.2
## Residual Deviance: 11.23 AIC: 41.43
c(c(beta_H1[N], beta_H2[N]), c(beta_H1S[N], beta_H2S[N]))
## [1] -212.4963 120.7682 NA NA
Comments
The relation is increasing at different rate depending on x. It is a non linear relation.